/*******************************************************************************
NETS_Sorting_Data.do

This file processes NETS data for Figure 13 and Tables I, II, and V. For M=3, 6,
12, 24, 48, it calculates average employment density across all locations in which 
each firm is located, weighted in three ways: unweighted, weighted by the firm's 
employment in a square, and weighted by the firm's number of establishments 
in a square. For M=12 and using as weights the firm-location's number of 
establishments, it repeats these steps 1) using only non-imputed data and 2)
excluding the own firm contribution to a location's employment density.

Last updated: 5/1/2021
*******************************************************************************/

version 15
//cd "C:\Plants_in_Space"
cd "X:\RE.E1D\Nico_datafirms_shared\Plants_in_Space_2021"
set more off
set type double
set varabbrev off
graph set window fontface "Times New Roman" 

use "Data\Intermediate\NETS\NETS_HQs_at_least_5_employees_2014_cleaned.dta", clear

// Save the location of each establishment (we use this to include HQ-location 
// fixed effects in the regressions later)
preserve
	keep establishment latitude longitude ID*
	rename * *_hq
	save "Data\Intermediate\NETS\NETS_HQs_at_least_5_employees_2014_cleaned_HQ_location_data.dta", replace
restore

// Save the number of establishments that each firm operates
preserve
	keep hq sic8
	by hq sic8, sort: gen hq_sic8_num_estab = _N
	by hq sic8: keep if _n == 1
	save "Data\Intermediate\NETS\NETS_HQs_at_least_5_employees_2014_cleaned_hq-sic8_number_plants.dta", replace
restore

/*******************************************************************************
For each M, compute average employment density across all locations in which a
firm is located (weighted in three different ways). In the resulting data, each 
observation is a firm and the variables of interest are log firm employment and 
the log average employment density across locations.
*******************************************************************************/
foreach m of numlist 3 6 12 24 48 {
	preserve
		// By firm-location pair, compute employment and number of establishments
		by hq sic8 ID_`m', sort: gen firm_location_num_estab = _N
		by hq sic8 ID_`m', sort: egen firm_location_employment = total(emp)
		
		// Keep unique firm-location pairs, then merge with data on employment density
		by hq sic8 ID_`m', sort: keep if _n==1
		merge m:1 ID_`m' using "Data\Intermediate\grid_population_and_employment_data\population_and_employment_data_M`m'.dta", keep(3) nogen keepusing(emp_density_`m')
				
		// Calculate average density unweighted, weighted by number of
		// establishments, and weighted by employment
		by hq sic8, sort: egen unwght_avg_emp_density_`m' = mean(emp_density_`m')
		by hq sic8, sort: egen estab_wght_emp_density_`m' = wtmean(emp_density_`m'), weight(firm_location_num_estab)
		by hq sic8, sort: egen emp_wght_emp_density_`m' = wtmean(emp_density_`m'), weight(firm_location_employment)
		
		// Keep unique firm observations
		by hq sic8, sort: keep if _n==1
		
		// Take logs of average density variables
		foreach var of varlist estab_wght_emp_density_`m' unwght_avg_emp_density_`m' emp_wght_emp_density_`m' {
			gen log_`var' = log(`var')
		}
		keep establishment hq sic8 sic2 emp emp_record_code ID_`m' firm_employment ///
			 log_firm_employment firm_location_num_estab firm_location_employment ///
			 unwght_avg_emp_density_`m' estab_wght_emp_density_`m' emp_wght_emp_density_`m' ///
			 log_estab_wght_emp_density_`m' log_unwght_avg_emp_density_`m' log_emp_wght_emp_density_`m'
		save "Data\Final\TablesI_II_V_and_Figure13\firm_average_location_density_M`m'_data.dta", replace
	restore
}

*********Repeat, excluding firm's own contribution to local employment**********
preserve
	// By firm-location pair, compute employment and number of establishments
	by hq sic8 ID_12, sort: gen firm_location_num_estab = _N
	by hq sic8 ID_12, sort: egen firm_location_employment = total(emp)
	
	// Keep unique firm-location pairs, then merge with data on employment
	by hq sic8 ID_12, sort: keep if _n==1
	merge m:1 ID_12 using "Data\Intermediate\grid_population_and_employment_data\population_and_employment_data_M12.dta", keep(3) nogen keepusing(total_emp)
	
	// Compute employment density, excluding the firm's own employment in the location
	gen emp_density_12 = (total_emp - firm_location_employment) / (12^2)
			
	// Calculate average density weighted by number of establishments
	by hq sic8, sort: egen estab_wght_emp_density_12 = wtmean(emp_density_12), weight(firm_location_num_estab)
	
	// Keep unique firm observations
	by hq sic8, sort: keep if _n==1
	
	gen log_estab_wght_emp_density_12 = log(estab_wght_emp_density_12)
	keep establishment hq sic8 sic2 emp emp_record_code ID_12 firm_employment ///
		 log_firm_employment firm_location_num_estab estab_wght_emp_density_12 log_estab_wght_emp_density_12
	save "Data\Final\TablesI_II_V_and_Figure13\firm_average_location_density_M12_excluding_own_contribution_data.dta", replace
restore

**********************Repeat, using non-imputed data only***********************
preserve
	// Keep only non-imputed data, and redefine total firm employment (these were
	// calculated including the non-imputed data)
	keep if emp_record_code == 0
	drop firm_employment log_firm_employment
	by hq sic8, sort: egen firm_employment = total(emp)
	gen log_firm_employment = log(firm_employment)

	// By firm-location pair, compute number of establishments	
	by hq sic8 ID_12, sort: gen firm_location_num_estab = _N
	
	// Keep unique firm-location pairs, then merge with data on employment density
	by hq sic8 ID_12, sort: keep if _n==1
	merge m:1 ID_12 using "Data\Intermediate\grid_population_and_employment_data\population_and_employment_data_M12.dta", keep(3) nogen keepusing(emp_density_12)

	// Calculate average density weighted by number of establishments
	by hq sic8, sort: egen estab_wght_emp_density_12 = wtmean(emp_density_12), weight(firm_location_num_estab)
	
	//Keep unique firm observations
	by hq sic8, sort: keep if _n==1

	gen log_estab_wght_emp_density_12 = log(estab_wght_emp_density_12)
	keep establishment hq sic8 sic2 emp emp_record_code ID_12 firm_employment ///
		 log_firm_employment firm_location_num_estab estab_wght_emp_density_12 log_estab_wght_emp_density_12
	save "Data\Final\TablesI_II_V_and_Figure13\firm_average_location_density_M12_nonimputed_data.dta", replace
restore
